package org.zjvis.datascience.common.util;

import cn.hutool.core.util.CharUtil;
import cn.hutool.db.Entity;
import com.monitorjbl.xlsx.StreamingReader;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;
import org.zjvis.datascience.common.constant.Constant;
import org.zjvis.datascience.common.constant.DatasetConstant;
import org.zjvis.datascience.common.enums.FileTypeEnum;
import org.zjvis.datascience.common.exception.DataScienceException;
import org.zjvis.datascience.common.vo.dataset.HeadVO;
import org.zjvis.datascience.common.vo.dataset.PreviewDatasetVO;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.sql.JDBCType;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.zjvis.datascience.common.util.ToolUtil.checkDuplicateName;

/**
 * @description 文件上传工具类
 * @date 2021-11-01
 */
public class FileImportUtil {


    private static final Logger logger = LoggerFactory.getLogger(FileImportUtil.class);
    private static final NumberFormat numberFormat = NumberFormat.getNumberInstance();

    /**
     * 获取excel Workbook
     *
     * @param file
     */
    public static Workbook readExcel(MultipartFile file) throws IOException {
        String fileName = file.getOriginalFilename();
        Workbook wk;
        assert fileName != null;
        if (fileName.toLowerCase().endsWith(Constant.EXCEL_XLS)) {
            wk = new HSSFWorkbook(file.getInputStream());
        } else {
            if (file.getSize() > 50 * 1024 * 1024) {
                wk = StreamingReader.builder()
                        .rowCacheSize(10000)  //缓存到内存中的行数，默认是10
                        .bufferSize(1024 * 1024 * 100)  //读取资源时，缓存到内存的字节大小，默认是1024
                        .open(file.getInputStream());  //打开资源，必须，可以是InputStream或者是File，注意：只能打开XLSX格式的文件;
            } else {
                wk = new XSSFWorkbook(file.getInputStream());
            }
        }
        return wk;
    }

    /**
     * 读取一个单元格
     *
     * @param cell
     * @return
     */
    public static String readCell(Cell cell) {
        String field;
        CellType cellType = cell.getCellType();
        /* 特殊格式数据处理 */
        switch (cellType) {
            case NUMERIC:
                /* 处理日期及时间格式 */
                if (DateUtil.isCellDateFormatted(cell)) {
                    double value = cell.getNumericCellValue();
                    cell.getDateCellValue();
                    SimpleDateFormat sf;
                    if (value > 1) {
                        sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                    } else {
                        sf = new SimpleDateFormat("HH:mm:ss");
                    }
                    field = sf.format(cell.getDateCellValue());
                } else {
                    switch (cell.getCellStyle().getDataFormat()) {
                        /*
                         * 自定义日期及时间格式，上面条件判断不出来
                         * yyyy-MM-dd----- 14
                         * yyyy年m月d日--- 31
                         * yyyy年m月------- 57
                         * m月d日  ---------- 58
                         * HH:mm----------- 20
                         * h时mm分  ------- 32
                         */
                        case 14:
                        case 20:
                        case 31:
                        case 32:
                        case 57:
                        case 58:
                            double value = cell.getNumericCellValue();
                            cell.getDateCellValue();
                            SimpleDateFormat sf;
                            if (value > 1) {
                                sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                            } else {
                                sf = new SimpleDateFormat("HH:mm:ss");
                            }

                            field = sf.format(cell.getDateCellValue());
                            break;
                        default:
                            /*
                             * 如果不处理，获取的数据是double类型。
                             * 这样处理之后数字获取到的还是整数字符串，且精度不丢失，
                             * 样就不会给用户造成数据类型误判。
                             * */
                            numberFormat.setGroupingUsed(false);
                            numberFormat.setMaximumFractionDigits(9);
                            field = numberFormat.format(cell.getNumericCellValue());
                            break;
                    }
                }
                break;
            case BOOLEAN:
                field = String.valueOf(cell.getBooleanCellValue());
                break;
            case ERROR:
//                field = String.valueOf(cell.getErrorCellValue());
                field = "";
                break;
            case STRING:
                field = cell.getStringCellValue();
                break;
            default:
                /*
                 * 上面几个不统一按这种方式处理是因为：曾经遇到过特殊文件的数据，按照下面转换类型
                 * 之后，value会变空字符串
                 * */
                cell.setCellType(CellType.STRING);
                field = cell.getStringCellValue();
                break;
        }
        return field;
    }

    /**
     * 设置首行为列名
     *
     * @param row
     * @return
     */
    public static List<String> setHeader(Row row, int last) {
        List<String> nameList = new ArrayList<>();
        for (int l = 0; l < last; l++) {
            String name;
            Cell cell = row.getCell(l, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
            if (cell == null) {
                //使用默认字段
                name = DatasetConstant.DEFAULT_FIELD_NAME + (l + 1);
            } else {
                name = readCell(cell);
                if (name == null || name.equals("")) {
                    name = DatasetConstant.DEFAULT_FIELD_NAME + (l + 1);
                }
                //特殊字符处理
                name = illegalChar(name);
            }
            //待上传数据重名检查，并返回新的字段名
            checkDuplicateName(name, nameList, true);
        }
        return nameList;
    }

    /**
     * 设置默认列名
     *
     * @param columnsNumber 列数
     * @return
     */
    public static List<String> setDefaultHeader(int columnsNumber) {
        List<String> list = new ArrayList<>();
        for (int n = 0; n < columnsNumber; n++) {
            list.add(DatasetConstant.DEFAULT_FIELD_NAME + (n + 1));
        }
        return list;
    }

    /**
     * 异常字符处理
     *
     * @param name
     * @return
     */
    public static String illegalChar(String name) {
        name = name.trim().replaceAll(DatasetConstant.ILLEGAL_CHARACTER_REGEX, "");
        if (name.matches(DatasetConstant.SPECIAL_CHARACTER_REGEX)) {
            name = name.replaceAll(DatasetConstant.SPECIAL_CHARACTER_REPLACE, "");
        }
        if (name.length() > 64) {
            //字段名过长就截断
            name = SqlUtil.cutStr(name, 63);
        }
        return name;
    }

    /**
     * 获取首行列名
     *
     * @param row
     * @return
     */
    public static String getHeader(Row row, int last) {
        StringBuilder header = new StringBuilder();
        List<String> list = new ArrayList<>();
        for (int l = 0; l < row.getLastCellNum(); l++) {
            String name;
            Cell cell = row.getCell(l, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
            if (cell == null) {
                //使用默认字段
                name = DatasetConstant.DEFAULT_FIELD_NAME + (l + 1);
            } else {
                name = readCell(cell);
                if (name == null || name.equals("")) {
                    name = DatasetConstant.DEFAULT_FIELD_NAME + (l + 1);
                }
                //特殊字符处理
                name = illegalChar(name);
                if (StringUtils.isBlank(name)) {
                    name = DatasetConstant.DEFAULT_FIELD_NAME + (l + 1);
                }
            }
            //head同名字段处理
            if (list.contains(name)) {
                name += "_" + CellReference.convertNumToColString(l);
            }
            list.add(name);
            header.append(name).append(",");
        }
        header.delete(header.length() - 1, header.length());
        header.append("\n");
        return header.toString();
    }

    public static int getRowNumber(Row row) {
        int last = row.getLastCellNum();
        for (int l = row.getLastCellNum() - 1; l >= 0; l--) {
            Cell cell = row.getCell(l, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
            if (cell == null) {
                last--;
            } else {
                break;
            }
        }
        return last;
    }

    /**
     * 获取默认列名
     *
     * @param columnsNumber 列数
     * @return
     */
    public static String getDefaultHeader(int columnsNumber) {
        StringBuilder header = new StringBuilder();
        for (int n = 1; n < columnsNumber + 1; n++) {
            header.append(DatasetConstant.DEFAULT_FIELD_NAME).append(n).append(",");
        }
        header.delete(header.length() - 1, header.length());
        header.append("\n");
        return header.toString();
    }

    /**
     * 读取一行为字符串
     *
     * @param row
     * @return
     */
    public static StringBuilder readRow(Row row, int lastLine) {
        StringBuilder rowString = new StringBuilder();
        //遍历所有的列
        for (int i = 0; i < lastLine; i++) {
            Cell cell = row.getCell(i);
            String field = "";
            if (cell != null) {
                field = readCell(cell);
            }
            if (field == null) {
                field = "";
            }
            Pattern pattern = Pattern.compile(DatasetConstant.utf8IllegalChar);
            Matcher matcher = pattern.matcher(field);
            field = matcher.replaceAll("");
            if (field.contains("\"")) {
                rowString.append("\"").append(field.replaceAll("\"", "\"\"")).append("\"");
            } else if (field.contains(",")) {

                rowString.append("\"").append(field).append("\"");
            } else {
                rowString.append(field);
            }
            rowString.append(",");
        }
        rowString.delete(rowString.length() - 1, rowString.length());
        rowString.append("\n");
        return rowString;
    }

    /**
     * 读取预览行
     *
     * @param row
     * @param headList 列名
     * @param values   储存数据
     * @return
     */
    public static void readRowForPreview(Row row, List<String> headList, List<Entity> values) {
        Entity rowEntity = new Entity();
        StringBuilder rowString = new StringBuilder();
        //遍历所有的列
        for (int i = 0; i < headList.size(); i++) {
            Cell cell = row.getCell(i);
            String field = "";
            if (cell != null) {
                field = readCell(cell);
            }
            if (field == null) {
                field = "";
            }

            rowString.append(field).append(",");
            rowEntity.set(headList.get(i), field);
        }
        //单行长度临界值判断，单行长度（包括换行符）超过32767，gpload无法处理
        if (rowString.length() > 32767) {
            logger.warn("单行长度临界值判断，单行长度（包括换行符）超过32767，gpload无法处理，此行跳过预览");
        } else {
            rowEntity.setFieldNames(headList);
            values.add(rowEntity);
        }
    }

    /**
     * 根据excel Workbook获取预览数据
     *
     * @param containsHeader 首行是否为列名
     * @return
     */
    public static List<PreviewDatasetVO> readForPreview(Workbook wk, boolean containsHeader,
                                                        String fileNameExt, boolean isLargeFile) {
        List<PreviewDatasetVO> previewDatasetVOS = new ArrayList<>();
        for (Sheet sheet : wk) {
            if (!isLargeFile) {
                checkMultiHead(sheet);
            }
            List<Entity> values = new LinkedList<>();
            List<String> headList = null;
            List<HeadVO> heads = new ArrayList<>();
            /* 空sheet直接跳过 */
            String sheetName = sheet.getSheetName().replaceAll("\\s*", "");
            boolean isFirstRow = true;
            int nowRow = 0, lastLine;
            for (Row row : sheet) {
                /* 当首行为列名时 */
                if (isFirstRow) {
                    lastLine = getRowNumber(row);         //获取第一行的列数
                    isFirstRow = false;
                    if (containsHeader) {
                        headList = setHeader(row, lastLine);
                        continue;
                    } else {
                        headList = setDefaultHeader(lastLine);
                    }
                }
                readRowForPreview(row, headList, values);
                nowRow++;
                if (nowRow > DatasetConstant.EXCEL_DATA_TYPE_CHECK_SIZE) {
                    break;
                }
            }
            if (nowRow == 0) {
                continue;
            }
            /* 生成head */
            for (String field : headList) {
                heads.add(HeadVO.builder()
                        .name(field)
                        .type(StringUtils.lowerCase(JDBCType.VARCHAR.getName()))
                        .build());
            }
            values = DatasetUtil
                    .recommendDataType(heads, values, DatasetConstant.DATA_PREVIEW_SIZE);
            PreviewDatasetVO pv = PreviewDatasetVO.builder().data(values).head(heads)
                    .owner(JwtUtil.getCurrentUserDTO().getName())
                    .fileName(fileNameExt + CharUtil.UNDERLINE + sheetName)
                    .name(sheetName).build();
            previewDatasetVOS.add(pv);
        }
        close(wk);
        return previewDatasetVOS;
    }

    /**
     * 检查是否有多级表头
     *
     * @param sheet
     */
    public static void checkMultiHead(Sheet sheet) {
        if (sheet.getNumMergedRegions() != 0) {
            CellRangeAddress range = sheet.getMergedRegion(0);
            if (range.getFirstRow() == 0 && range.getFirstColumn() != range.getLastColumn()) {
                throw new DataScienceException(80003, sheet.getSheetName() + "表中含有多级表头，无法上传");
            }
        }
    }

    /**
     * 将表存为临时的csv文件
     *
     * @param sheet    表
     * @param fileName 存为的文件名
     */
    public static int readSheet(Sheet sheet, String fileName, boolean containsHeader,
                                String tempfilepath) {
        String tempFileName = fileName + CharUtil.DOT + FileTypeEnum.CSV.getValue();
        OutputStream outputStream = null;
        boolean isFirstRow = true;
        int num = 0;
        int lastLine = 0;
        try {
            outputStream = new FileOutputStream(tempfilepath + tempFileName);
            for (Row row : sheet) {

                if (isFirstRow) {
                    lastLine = getRowNumber(row);
                    isFirstRow = false;
                    if (containsHeader) {
                        outputStream
                                .write(getHeader(row, lastLine).getBytes(StandardCharsets.UTF_8));
                        num++;
                        continue;
                    }
                }
                StringBuilder rowString = readRow(row, lastLine);
                outputStream.write(rowString.toString().getBytes(StandardCharsets.UTF_8));
                num++;

            }
            outputStream.flush();
        } catch (IOException e) {
            logger.error("FileImportUtil.readSheet():" + e.getMessage());
        } finally {
            close(outputStream);
        }
        return num;
    }

    public static void close(OutputStream outputStream) {
        if (outputStream != null) {
            try {
                outputStream.close();
            } catch (IOException e) {
                logger.error(e.getMessage());
            }
        }
    }

    public static void close(InputStream inputStream) {
        if (inputStream != null) {
            try {
                inputStream.close();
            } catch (IOException e) {
                logger.error(e.getMessage());
            }
        }
    }

    public static void close(Workbook workbook) {
        if (workbook != null) {
            try {
                workbook.close();
            } catch (IOException e) {
                logger.error(e.getMessage());
            }
        }
    }
}
